package com.gylang.crawler.crawler.dy2018;

import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.regex.Pattern;

/**
 * @author gylang
 * data 2021/5/27
 */
public abstract class BaseDy2018PageProcessor implements PageProcessor {

    private static final Site s = Site.me()
            .setRetryTimes(3)
            .setTimeOut(10000)
            ;
    public static final String BASE_URL = "https://www.dy2018.com";

    public static final Pattern DATE_P = Pattern.compile("日期：([\\w-]*)");
    public static final Pattern NUMBER_P = Pattern.compile("(\\d+)");
    public static final Pattern TYPE_P = Pattern.compile("类型:([\u4e00-\u9fa5/]*)");
    public static final Pattern ALIAS_NAME_P = Pattern.compile("别名:([^◎<>]*)");
    public static final Pattern ACTOR_P = Pattern.compile("主演:([^◎<>]*)");
    public static final String BLANK_7 = " 　　　　　　";
    public static final Pattern DIRECT_P = Pattern.compile("导演:([^◎<>]*)");
    public static final Pattern MOVIE_NAME_P = Pattern.compile("片名:([^◎<>]*)");
    public static final Pattern LABEL_P = Pattern.compile("标签:([^◎<>]*)");


    @Override
    public Site getSite() {
        return s;
    }
}
